from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, make_scorer
from pathlib import Path

def write_hpValues(model_Name, HP_Values,dataset):
    Path("results/%s" % dataset).mkdir(parents=True, exist_ok=True)
    Path("results/%s/%s" % (dataset,model_Name)).mkdir(parents=True, exist_ok=True)
    file_name = r'results/%s/%s/BestParam.txt' % (dataset,model_Name)
    with open(file_name, 'a+') as x_file:
        x_file.write(str(HP_Values))
        x_file.write("\n----------------------------------------------------------")
    return 0


# parameters grid to select the best combination using the listed values
def RandomForest_hyperparameterTuning(X_train, Y_train, dataset):
    grid_Param={'n_estimators': [10,100,500,800,1500,2000,3000,5000],
                'criterion': ['gini','entropy'],
                'max_features':['auto','sqrt','log2'],
                'max_depth': [10,20,30,50,70,100,200,300,400,'None'],
                'min_samples_split': [4,7,10,20,30,35,50,100,200],
                'min_samples_leaf': [1,4,7,9,15,20,30,40,50],
                'bootstrap': [True, False]}
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    RandomForest = RandomForestClassifier()
    grid_search = RandomizedSearchCV(estimator= RandomForest, param_distributions=grid_Param, n_iter=100, cv=kfold, verbose=2 , scoring='accuracy',error_score=0, random_state=42, n_jobs=-1)
    grid_search.fit(X_train, Y_train)
    best_grid = grid_search.best_estimator_
    write_hpValues('RandomForest', str(best_grid), dataset)

    print(grid_search.best_estimator_)
    return grid_search


def RandomForest_training_tuning(X_train,Y_train, dataset):

    best_random = RandomForest_hyperparameterTuning(X_train,Y_train, dataset)
    Trained_model = RandomForestClassifier(**best_random.best_params_)
    Trained_model.fit(X_train,Y_train)
    return Trained_model
